---
title: "Equivalence Testing for Regression Discontinuity Designs -- Application"
author: "Erin Hartman"
output: html_document
---

## Main Analysis
### Functions
```{r libraries, warning=FALSE, message=FALSE}
library(rddensity)
library(rdrobust)
library(gridExtra)
library(tidyverse)
library(dataverse)

## Last run date
print(Sys.time())

start_time <- Sys.time()

print(sessionInfo())
```

### Load Functions

Load the RDD Equivalence functions from Github.  At time of publication, the repository was at commit
`f3836d2d4cf663e86a9b69752cc28b0a521f556f` which is loaded below.
```{r}
devtools::source_url("https://github.com/ekhartman/rdd_equivalence/blob/f3836d2d4cf663e86a9b69752cc28b0a521f556f/RDD_equivalence_functions.R?raw=TRUE")
```

Custom plot functions.
```{r functions}
generate_plot = function(equiv_tests, panel.widths=c(1, 1, 5, 1, 1), display.names = NULL, var.rounding = 1, pval.rounding = 2, fdr_correct = FALSE, density = FALSE, xlim = NULL) {
  .e <- environment()
  if(fdr_correct) {
    equiv_tests$p.vals = p.adjust(equiv_tests$p.vals, method = "BH")  
  }
  
  if(density) {
    equiv_tests$inverted.lower = 1/equiv_tests$inverted.scaled
    equiv_tests$inverted.lower = paste0("[", round(equiv_tests$inverted.lower, 2), ", ", round(equiv_tests$inverted.scaled, 2), "]")
  }
  
  if(!is.null(display.names)) {
    equiv_tests$names = factor(display.names, levels = rev(display.names))
  } else {
    equiv_tests$names = factor(row.names(equiv_tests), levels = rev(row.names(equiv_tests)))
  }
  equiv_tests$const = rep(1, nrow(equiv_tests))
  equiv_tests = equiv_tests[nrow(equiv_tests):1, ]
  
  g = ggplot(equiv_tests, aes(x = names) )
  
   if(density) g = g + geom_linerange(aes(ymin = 1/inverted, ymax = inverted), size = 5, color = "darkgray", alpha = 0.9)
   if(!density) g = g + geom_linerange(aes(ymin = -1 * inverted.scaled, ymax = inverted.scaled), size = 5, color = "darkgray", alpha = 0.9)
   if(length(unique(equiv_tests$tol)) > 1){
     
   } else {
  
     lines = unique(equiv_tests$tol)
     if(density) {
       lower = 1/lines
     } else {
       lower = -1*lines
     }
     g = g + geom_hline(yintercept = c(lower, lines), linetype = 2, size = 0.75)
   }
   g = g  + scale_shape_identity() + geom_point(aes(y = observed.smd), color = "black", shape = 18, size = 4)
   g = (g + coord_flip() + theme_bw()
       + theme(
         axis.text.y = element_blank()
         , axis.ticks.y = element_blank()
         , axis.title.y = element_blank()
         , axis.text.x = element_text(size = 10)
         , axis.title.x = element_text(size = 10)
         , plot.title = element_text(hjust = 0.5))
       + labs(x=NULL, y = expression("Equivalence Confidence Interval"))
   )
   if(length(unique(equiv_tests$tol)) == 1) {
     g = g + ggtitle(paste0(ifelse(!density, "Continuity Tests", "Density Tests"),"  \n \n", "Equivalence Range: ", ifelse(!density, paste0("+/- ", round(unique(equiv_tests$tol), 2)), paste0("(", round(1/unique(equiv_tests$tol), 2), ", ", round(unique(equiv_tests$tol), 2), ")"))))
   } else {
     g = g + ggtitle(paste0("Equivalence Tests  \n", "Standard Deviation Units\n For Non-Binary Measures"))
   }
  
  if(!density) g_inv = ggplot(equiv_tests, aes(x = names, y = const, label = round(inverted, var.rounding)), environment = .e)
  if(density) g_inv = ggplot(equiv_tests, aes(x = names, y = const, label = inverted.lower), environment = .e)
  g_inv = g_inv + geom_text()
  g_inv = (g_inv + theme_bw() + coord_flip()
        + theme(panel.grid.minor=element_blank()
                , panel.grid.major=element_blank()
                #, axis.line.x = element_blank()
                , axis.text.x = element_text(color = "white", size = 14)
                , axis.ticks.x = element_line(color = "white")
                , axis.text.y = element_blank()
                , axis.ticks.y = element_blank()
                , axis.title.x = element_text(size = 14)
                , plot.title = element_text(hjust = 0.5)
        )
        + ylim(1-.05, 1.05)
        + ggtitle(paste0("Equivalence\nConfidence\nInterval", ifelse(!density, " (+/-)", "")))
        + labs(y = " ", x = NULL)
  )
  
  g_obs = ggplot(equiv_tests, aes(x = names, y = const, label = round(observed.diff, var.rounding)), environment = .e)
  g_obs = g_obs + geom_text()
  g_obs = (g_obs + theme_bw() + coord_flip()
           + theme(panel.grid.minor=element_blank()
                   , panel.grid.major=element_blank()
                   #, axis.line.x = element_blank()
                   , axis.text.x = element_text(color = "white", size = 14)
                   , axis.ticks.x = element_line(color = "white")
                   , axis.text.y = element_blank()
                   , axis.ticks.y = element_blank()
                   , axis.title.x = element_text(size = 14)
                   , plot.title = element_text(hjust = 0.5)
           )
           + ylim(1-.05, 1.05)
           + ggtitle(ifelse(!density, "Observed\nMean\nDifference", "Observed\nRatio\n"))
           + labs(y = " ", x = NULL)
  )
  
  g_pval = ggplot(equiv_tests, aes(x = names, y = const, label = round(p.vals, pval.rounding)), environment = .e)
  g_pval = g_pval + geom_text()
  g_pval = (g_pval + theme_bw() + coord_flip()
        + theme(panel.grid.minor=element_blank()
                , panel.grid.major=element_blank()
                #, axis.line.x = element_blank()
                , axis.text.x = element_text(color = "white", size = 14)
                , axis.ticks.x = element_line(color = "white")
                , axis.text.y = element_blank()
                , axis.ticks.y = element_blank()
                , axis.title.x = element_text(size = 14)
                , plot.title = element_text(hjust = 0.5)
        )
        + ylim(1-.05, 1.05)
        + ggtitle(ifelse(fdr_correct, "FDR\nCorrected\nP-value", "\n\nP-value"))
        + labs(y = " ", x = NULL)
  )
  
  g_var = ggplot(equiv_tests, aes(x = names, y = const, label = names))
  g_var = g_var + geom_text()
  g_var = (g_var + theme_bw() + coord_flip()
        + theme(panel.grid.minor=element_blank()
                , panel.grid.major=element_blank()
                #, axis.line.x = element_blank()
                , axis.text.x = element_text(color = "white", size = 14)
                , axis.ticks.x = element_line(color = "white")
                , axis.text.y = element_blank()
                , axis.ticks.y = element_blank()
                , panel.border = element_blank()
                , axis.title.x = element_text(size = 14)
        )
        + ylim(1-.05, 1.05)
        + ggtitle("\n \nVariable") + theme(plot.title = element_text(hjust = 0.5))
        + labs(y = " ", x = NULL)
  )
  
  grid.arrange(g_var, g_obs, g, g_inv, g_pval, ncol=5, nrow=1, widths= panel.widths, heights=c(4))
}

```

## Eggers et al. (2014) Re-analysis (main manuscript)

Get the Eggers et al. (2014) data from dataverse, located at `https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/24937`.

```{r, warning=FALSE, message = FALSE, results = FALSE}
writeBin(get_file("replication_main.tab", "doi:10.7910/DVN/24937"), 
         "replication_main.dta")

x <- foreign::read.dta("./replication_main.dta")
file.remove("./replication_main.dta")
```


```{r eggers_analysis, warning=FALSE, message = FALSE, results = FALSE}
x$rv[!is.na(x$share_first) & !is.na(x$share_third) & x$share_first - x$share_third < 5] = NA
x$outcome = x$lag_rv

## function to run analysis for each subset condition
by_condition = function(condition, data, eps_den = 1.5, eps_cont) {
  condition = as.vector(condition)
  den_est = rddensity(data$rv[condition], vce = "plugin", all = TRUE)

  equiv_den = rdd.tost.ratio(estL = den_est$hat$left, estR = den_est$hat$right, seL = den_est$sd_asy$left, seR = den_est$sd_asy$right, eps = eps_den)
  
  cont_est = rdrobust(y=data$outcome[condition],x=data$rv[condition], bwselect = "mserd")

  equiv_cont = rdd.equiv(cont_est$coef[3], cont_est$se[3], eps = eps_cont)
  
  equiv_tost = rdd.tost(cont_est$coef[3], cont_est$se[3], eps = eps_cont)
  
  return(bind_rows(
    data.frame(test = "cont", tol = eps_cont, inverted = equiv_cont$inverted, 
               inverted.scaled = equiv_cont$inverted, p.vals = equiv_cont$p, 
               observed.diff = cont_est$coef[3], observed.smd = cont_est$coef[3], 
               n_eff = sum(cont_est$N_h)),
    data.frame(test = "den", tol = eps_den, inverted = equiv_den$inverted, 
               inverted.scaled = equiv_den$inverted, p.vals = equiv_den$p, 
               observed.diff = den_est$hat$left / den_est$hat$right, observed.smd = den_est$hat$left / den_est$hat$right, 
               n_eff = den_est$N$eff_left + den_est$N$eff_left),
    data.frame(test = "cont_tost", tol = eps_cont, inverted = equiv_tost$inverted, 
               inverted.scaled = equiv_tost$inverted, p.vals = equiv_tost$p, 
               observed.diff = cont_est$coef[3], observed.smd = cont_est$coef[3], 
               n_eff = sum(cont_est$N_h))))
}

## each of the conditions, as defined in Eggers et al. (2014) replication file
conditions = as.data.frame(cbind(with(x, country == "USA" & office_type == "HOUSE OF REPRESENTATIVES" & year >= 1880 & year <= 2010 & !is.na(rv)),
                 with(x, country == "USA" & office_type == "HOUSE OF REPRESENTATIVES" & year >= 1880 & year <= 1944 & !is.na(rv)), 
                 with(x, country == "USA" & office_type == "HOUSE OF REPRESENTATIVES" & year >= 1946 & year <= 2010 & !is.na(rv)), 
                 with(x, country == "USA" & office_type == "STATEWIDE" & !is.na(rv)),
                 with(x, country == "USA" & office_type == "STATE LEGISLATURE" & !is.na(rv)),
                 with(x, country == "USA" & office_type == "MAYOR" & !is.na(rv)),
                 with(x, country == "CANADA" & !is.na(rv)),
                 with(x, country == "CANADA" & year >= 1867 & year <= 1911 & !is.na(rv)),
                 with(x, country == "CANADA" & year >= 1921 & !is.na(rv)),
                 with(x, country == "UK" & office_type == "HOUSE OF COMMONS" & !is.na(rv)),
                 with(x, country == "UK" & office_type == "LOCAL COUNCIL" & !is.na(rv)),
                 with(x, country == "GERMANY" & office_type == "BUNDESTAG" & !is.na(rv)),
                 with(x, country == "GERMANY" & office_type == "BAVARIA, MAYOR" & !is.na(rv)),
                 with(x, country == "FRANCE" & office_type == "NATIONAL ASSEMBLY" & !is.na(rv)),
                 with(x, country == "FRANCE" & office_type == "MUNICIPALITY" & !is.na(rv)),
                 with(x, country == "AUSTRALIA" & !is.na(rv)),
                 with(x, country == "NEW ZEALAND" & !is.na(rv)),
                 with(x, country == "INDIA" & !is.na(rv)),
                 with(x, country == "BRAZIL" & !is.na(rv)),
                 with(x, country == "MEXICO" & !is.na(rv)),
                 with(x, !is.na(rv))
                 ))

## run equivalence tests
tests = lapply(conditions, by_condition, data = x, eps_cont = 2.5) %>% bind_rows()
  
## add condition names
names = c("U.S., House of Reps, 1880-2010", "U.S., House of Reps, 1880-1944", "U.S., House of Reps, 1946-2010", "U.S., Statewide, 1946-2010", "U.S., State Legislature, 1990-2010", "U.S., Mayors, 1947-2007", "Canada, Commons, 1867-2011", "Canada, Commons, 1867-1911", "Canada, Commons, 1921-2011", "U.K., Commons, 1918-2010", "U.K., Local Councils, 1946-2010", "Germany, Bundestag, 1953-2009", "Bavaria, Mayors, 1948-2009", "France, National Assembly, 1958-2007", 
          "France, Municipalities, 2008", 
          "Australia, House of Reps, 1987-2007", 
          "New Zealand, Parliament, 1949-1987", 
          "India, Lower House, 1977-2004", "Brazil, Mayors, 2000-2008", "Mexico, Mayors, 1970-2009", "All Races Pooled")

tests$names = rep(names, each = 3)
```

### Figure 3 -- Application Continuity Tests
```{r, fig.width=10, fig.height=5.4, fig.align="center", warning=FALSE}
generate_plot(tests %>% filter(test == "cont") %>% select(-test, -names), display.names = paste(tests %>% filter(test == "cont") %>% select(names) %>% t(), paste0("(n_eff = ", t(tests %>% filter(test == "cont") %>% select(n_eff)), ")")), panel.widths = c(5.9, 1.5, 5, 1.75, 1.25), fdr_correct = TRUE, density = FALSE, var.rounding = 2)

pdf(file = "./eggers_cont_tests.pdf", width = 10, height = 5.4)
generate_plot(tests %>% filter(test == "cont") %>% select(-test, -names), display.names = paste(tests %>% filter(test == "cont") %>% select(names) %>% t(), paste0("(n_eff = ", t(tests %>% filter(test == "cont") %>% select(n_eff)), ")")), panel.widths = c(5.9, 1.5, 5, 1.75, 1.25), fdr_correct = TRUE, density = FALSE, var.rounding = 2)
dev.off()
```

### Figure 4 -- Application Density Tests
```{r, fig.width=10, fig.height=5.4, fig.align="center", warning=FALSE}
generate_plot(tests %>% filter(test == "den") %>% select(-test, -names), display.names = paste(names, paste0("(n_eff = ", t(tests %>% filter(test == "den") %>% select(n_eff)), ")")), panel.widths = c(5.9, 1.5, 5, 1.75, 1.25), fdr_correct = TRUE, density = TRUE, var.rounding = 2)


pdf(file = "./eggers_den_tests.pdf", width = 10, height = 5.4)
generate_plot(tests %>% filter(test == "den") %>% select(-test, -names), display.names = paste(names, paste0("(n_eff = ", t(tests %>% filter(test == "den") %>% select(n_eff)), ")")), panel.widths = c(5.9, 1.5, 5, 1.75, 1.25), fdr_correct = TRUE, density = TRUE, var.rounding = 2)
dev.off()
```

The following produces the point estimate and standard error for the US mayors race, used in footnote 18.
```{r footnote eps_undefined, warning=FALSE, message = FALSE, results = FALSE}
condition = with(x, country == "USA" & office_type == "MAYOR" & !is.na(rv))
est_mayor <- rdrobust(y = x$outcome[condition], x = x$rv[condition], bwselect = "mserd")
```

```{r}
# RBC estimates
est_mayor$coef[3]
  est_mayor$se[3]
# t-stat
est_mayor$coef[3] / est_mayor$se[3]
```


## Supplementary Materials
### Caughey and Sekhon (2011) Re-analysis

Get the Caughey and Sekhon (2011) data from dataverse, located at `https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/16357&version=1.0`.
```{r, warning=FALSE, message = FALSE, results = FALSE}
writeBin(get_file("RDReplication.tab", "hdl:1902.1/16357&version=1.0"), 
         "RDReplication.dta")

x <- foreign::read.dta("./RDReplication.dta")
file.remove("./RDReplication.dta")
```

```{r caughey_sekhon_analysis, warning=FALSE, message = FALSE, results = FALSE}
x <- subset(x, x$Use == 1 & !is.na(x$DifDPct) )

x$rv = x$DifDPct

## function to run analysis for each covariate
by_condition = function(outcome, data, eps_den = 1.5, eps_cont, eps_bin) {
  data = data[!is.na(data[, outcome]), ]
  
  data_sd = sd(data[,outcome], na.rm = TRUE)
  if(!((sum(c(0, 1) %in% unique(data[, outcome])) == 2) & length(unique(data[, outcome])) == 2) )  {
    data[,outcome] = data[,outcome]/data_sd
  } else {
    data_sd = 1
    eps_cont = eps_bin
  }
  cont_est = rdrobust(y=data[, outcome] , x=data$rv, bwselect = "mserd")

  equiv_cont = rdd.equiv(cont_est$coef[3], cont_est$se[3], eps = eps_cont)
  
  return(data.frame(test = "cont", tol = eps_cont, inverted = equiv_cont$inverted * data_sd, inverted.scaled = equiv_cont$inverted, p.vals = equiv_cont$p, observed.diff = cont_est$coef[3] * data_sd, observed.smd = cont_est$coef[3], n_eff = sum(cont_est$N_h), point_est = cont_est$coef[3], point_pv = cont_est$pv[3])
       )
}

## run test for each covariate in original analysis
tests = lapply(list('DWinPrv', 'DPctPrv', 'DifDPPrv', 'IncDWNOM1', 'DemInc', 
                                                       'NonDInc', 'PrvTrmsD', 'PrvTrmsO', 'RExpAdv', 'DExpAdv',
                                                       'ElcSwing', 'CQRating3', 'DSpndPct', 'DDonaPct', 'SoSDem',
                                                       'GovDem', 'DifPVDec', 'DemOpen', 'NonDOpen', 'OpenSeat',
                                                       'VtTotPct', 'GovWkPct', 'UrbanPct', 'BlackPct', 'ForgnPct'), by_condition, data = x, eps_cont = 0.36, eps_bin = 0.1) %>% bind_rows()

## append varaible names
names = c('Dem Win t - 1', 'Dem % t - 1', 'Dem % Margin t - 1', 'Inc\'s D1 NOMINATE', 'Dem Inc in Race', 'Rep Inc in Race', 'Dem\'s # Prev Terms', 'Rep\'s # Prev Terms', 'Rep Experience Adv', 'Dem Experience Adv', 'Partisan Swing', 'CQ Rating {-1, 0, 1}', 'Dem Spending %', 'Dem Donation %', 'Dem Sec of State', 'Dem Governor', 'Dem Pres % Margin', 'Dem-held Open Seat', 'Rep-held Open Seat', 'Open Seat', 'Voter Turnout %', 'Pct Gov\'t Worker', 'Pct Urban', 'Pct Black', 'Pct Foreign Born')

tests$names = names

tests = tests %>% arrange(desc(n_eff))
```

### Figure SI-2 -- Caughey and Sekhon (2011) continuity in pre-treatment covariates
```{r}
generate_plot(tests, display.names = paste(tests %>% filter(test == "cont") %>% select(names) %>% t(), paste0("(n_eff = ", t(tests %>% filter(test == "cont") %>% select(n_eff)), ")")), panel.widths = c(3, 1.5, 5, 1.75, 1.25), fdr_correct = TRUE, density = FALSE, var.rounding = 2)

pdf(file = "./caughey_sekhon_cont_tests.pdf", width = 11, height = 5.4)
generate_plot(tests, display.names = paste(tests %>% filter(test == "cont") %>% select(names) %>% t(), paste0("(n_eff = ", t(tests %>% filter(test == "cont") %>% select(n_eff)), ")")), panel.widths = c(3, 1.5, 5, 1.75, 1.25), fdr_correct = TRUE, density = FALSE, var.rounding = 2)
dev.off()
```

Total run time is `r round(difftime(Sys.time(), start_time, units = "min"), 2)` minutes.